// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/StringUtils.h"

#include "doctest.h"
#include "ScopedFlags.h"

#include <string.h>

using namespace Luau::CodeGen;
using namespace Luau::CodeGen::X64;

static std::string bytecodeAsArray(const std::vector<uint8_t>& bytecode)
{
    std::string result = "{";

    for (size_t i = 0; i < bytecode.size(); i++)
        Luau::formatAppend(result, "%s0x%02x", i == 0 ? "" : ", ", bytecode[i]);

    return result.append("}");
}

class AssemblyBuilderX64Fixture
{
public:
    bool check(void (*f)(AssemblyBuilderX64& build), std::vector<uint8_t> code, std::vector<uint8_t> data = {})
    {
        AssemblyBuilderX64 build(/* logText= */ false);

        f(build);

        build.finalize();

        if (build.code != code)
        {
            printf("Expected code: %s\nReceived code: %s\n", bytecodeAsArray(code).c_str(), bytecodeAsArray(build.code).c_str());
            return false;
        }

        if (build.data != data)
        {
            printf("Expected data: %s\nReceived data: %s\n", bytecodeAsArray(data).c_str(), bytecodeAsArray(build.data).c_str());
            return false;
        }

        return true;
    }
};

TEST_SUITE_BEGIN("x64Assembly");

#define SINGLE_COMPARE(inst, ...) \
    CHECK(check( \
        [](AssemblyBuilderX64& build) \
        { \
            build.inst; \
        }, \
        {__VA_ARGS__} \
    ))

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseBinaryInstructionForms")
{
    // reg, reg
    SINGLE_COMPARE(add(rax, rcx), 0x48, 0x03, 0xc1);
    SINGLE_COMPARE(add(rsp, r12), 0x49, 0x03, 0xe4);
    SINGLE_COMPARE(add(r14, r10), 0x4d, 0x03, 0xf2);

    // reg, imm
    SINGLE_COMPARE(add(rax, 0), 0x48, 0x83, 0xc0, 0x00);
    SINGLE_COMPARE(add(rax, 0x7f), 0x48, 0x83, 0xc0, 0x7f);
    SINGLE_COMPARE(add(rax, 0x80), 0x48, 0x81, 0xc0, 0x80, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(r10, 0x7fffffff), 0x49, 0x81, 0xc2, 0xff, 0xff, 0xff, 0x7f);
    SINGLE_COMPARE(add(al, 3), 0x80, 0xc0, 0x03);
    SINGLE_COMPARE(add(sil, 3), 0x48, 0x80, 0xc6, 0x03);
    SINGLE_COMPARE(add(r11b, 3), 0x49, 0x80, 0xc3, 0x03);

    // reg, [reg]
    SINGLE_COMPARE(add(rax, qword[rax]), 0x48, 0x03, 0x00);
    SINGLE_COMPARE(add(rax, qword[rbx]), 0x48, 0x03, 0x03);
    SINGLE_COMPARE(add(rax, qword[rsp]), 0x48, 0x03, 0x04, 0x24);
    SINGLE_COMPARE(add(rax, qword[rbp]), 0x48, 0x03, 0x45, 0x00);
    SINGLE_COMPARE(add(rax, qword[r10]), 0x49, 0x03, 0x02);
    SINGLE_COMPARE(add(rax, qword[r12]), 0x49, 0x03, 0x04, 0x24);
    SINGLE_COMPARE(add(rax, qword[r13]), 0x49, 0x03, 0x45, 0x00);

    SINGLE_COMPARE(add(r12, qword[rax]), 0x4c, 0x03, 0x20);
    SINGLE_COMPARE(add(r12, qword[rbx]), 0x4c, 0x03, 0x23);
    SINGLE_COMPARE(add(r12, qword[rsp]), 0x4c, 0x03, 0x24, 0x24);
    SINGLE_COMPARE(add(r12, qword[rbp]), 0x4c, 0x03, 0x65, 0x00);
    SINGLE_COMPARE(add(r12, qword[r10]), 0x4d, 0x03, 0x22);
    SINGLE_COMPARE(add(r12, qword[r12]), 0x4d, 0x03, 0x24, 0x24);
    SINGLE_COMPARE(add(r12, qword[r13]), 0x4d, 0x03, 0x65, 0x00);

    // reg, [base+imm8]
    SINGLE_COMPARE(add(rax, qword[rax + 0x1b]), 0x48, 0x03, 0x40, 0x1b);
    SINGLE_COMPARE(add(rax, qword[rbx + 0x1b]), 0x48, 0x03, 0x43, 0x1b);
    SINGLE_COMPARE(add(rax, qword[rsp + 0x1b]), 0x48, 0x03, 0x44, 0x24, 0x1b);
    SINGLE_COMPARE(add(rax, qword[rbp + 0x1b]), 0x48, 0x03, 0x45, 0x1b);
    SINGLE_COMPARE(add(rax, qword[r10 + 0x1b]), 0x49, 0x03, 0x42, 0x1b);
    SINGLE_COMPARE(add(rax, qword[r12 + 0x1b]), 0x49, 0x03, 0x44, 0x24, 0x1b);
    SINGLE_COMPARE(add(rax, qword[r13 + 0x1b]), 0x49, 0x03, 0x45, 0x1b);

    SINGLE_COMPARE(add(r12, qword[rax + 0x1b]), 0x4c, 0x03, 0x60, 0x1b);
    SINGLE_COMPARE(add(r12, qword[rbx + 0x1b]), 0x4c, 0x03, 0x63, 0x1b);
    SINGLE_COMPARE(add(r12, qword[rsp + 0x1b]), 0x4c, 0x03, 0x64, 0x24, 0x1b);
    SINGLE_COMPARE(add(r12, qword[rbp + 0x1b]), 0x4c, 0x03, 0x65, 0x1b);
    SINGLE_COMPARE(add(r12, qword[r10 + 0x1b]), 0x4d, 0x03, 0x62, 0x1b);
    SINGLE_COMPARE(add(r12, qword[r12 + 0x1b]), 0x4d, 0x03, 0x64, 0x24, 0x1b);
    SINGLE_COMPARE(add(r12, qword[r13 + 0x1b]), 0x4d, 0x03, 0x65, 0x1b);

    // reg, [base+imm32]
    SINGLE_COMPARE(add(rax, qword[rax + 0xabab]), 0x48, 0x03, 0x80, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rbx + 0xabab]), 0x48, 0x03, 0x83, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rsp + 0xabab]), 0x48, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rbp + 0xabab]), 0x48, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[r10 + 0xabab]), 0x49, 0x03, 0x82, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[r12 + 0xabab]), 0x49, 0x03, 0x84, 0x24, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[r13 + 0xabab]), 0x49, 0x03, 0x85, 0xab, 0xab, 0x00, 0x00);

    SINGLE_COMPARE(add(r12, qword[rax + 0xabab]), 0x4c, 0x03, 0xa0, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rbx + 0xabab]), 0x4c, 0x03, 0xa3, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rsp + 0xabab]), 0x4c, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rbp + 0xabab]), 0x4c, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[r10 + 0xabab]), 0x4d, 0x03, 0xa2, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[r12 + 0xabab]), 0x4d, 0x03, 0xa4, 0x24, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[r13 + 0xabab]), 0x4d, 0x03, 0xa5, 0xab, 0xab, 0x00, 0x00);

    // reg, [index*scale]
    SINGLE_COMPARE(add(rax, qword[rax * 2]), 0x48, 0x03, 0x04, 0x45, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rbx * 2]), 0x48, 0x03, 0x04, 0x5d, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rbp * 2]), 0x48, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[r10 * 2]), 0x4a, 0x03, 0x04, 0x55, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[r12 * 2]), 0x4a, 0x03, 0x04, 0x65, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[r13 * 2]), 0x4a, 0x03, 0x04, 0x6d, 0x00, 0x00, 0x00, 0x00);

    SINGLE_COMPARE(add(r12, qword[rax * 2]), 0x4c, 0x03, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rbx * 2]), 0x4c, 0x03, 0x24, 0x5d, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rbp * 2]), 0x4c, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[r10 * 2]), 0x4e, 0x03, 0x24, 0x55, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[r12 * 2]), 0x4e, 0x03, 0x24, 0x65, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[r13 * 2]), 0x4e, 0x03, 0x24, 0x6d, 0x00, 0x00, 0x00, 0x00);

    // reg, [base+index*scale+imm]
    SINGLE_COMPARE(add(rax, qword[rax + rax * 2]), 0x48, 0x03, 0x04, 0x40);
    SINGLE_COMPARE(add(rax, qword[rax + rbx * 2 + 0x1b]), 0x48, 0x03, 0x44, 0x58, 0x1b);
    SINGLE_COMPARE(add(rax, qword[rax + rbp * 2]), 0x48, 0x03, 0x04, 0x68);
    SINGLE_COMPARE(add(rax, qword[rax + rbp + 0xabab]), 0x48, 0x03, 0x84, 0x28, 0xAB, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rax + r12 + 0x1b]), 0x4a, 0x03, 0x44, 0x20, 0x1b);
    SINGLE_COMPARE(add(rax, qword[rax + r12 * 4 + 0xabab]), 0x4a, 0x03, 0x84, 0xa0, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[rax + r13 * 2 + 0x1b]), 0x4a, 0x03, 0x44, 0x68, 0x1b);
    SINGLE_COMPARE(add(rax, qword[rax + r13 + 0xabab]), 0x4a, 0x03, 0x84, 0x28, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rax + r12 * 2]), 0x4e, 0x03, 0x24, 0x60);
    SINGLE_COMPARE(add(r12, qword[rax + r13 + 0xabab]), 0x4e, 0x03, 0xA4, 0x28, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(r12, qword[rax + rbp * 2 + 0x1b]), 0x4c, 0x03, 0x64, 0x68, 0x1b);

    // reg, [imm32]
    SINGLE_COMPARE(add(rax, qword[0]), 0x48, 0x03, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(add(rax, qword[0xabab]), 0x48, 0x03, 0x04, 0x25, 0xab, 0xab, 0x00, 0x00);

    // [addr], reg
    SINGLE_COMPARE(add(qword[rax], rax), 0x48, 0x01, 0x00);
    SINGLE_COMPARE(add(qword[rax + rax * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0x80, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(qword[rbx + rax * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x43, 0x1b);
    SINGLE_COMPARE(add(qword[rbx + rbp * 2 + 0x1b], rax), 0x48, 0x01, 0x44, 0x6b, 0x1b);
    SINGLE_COMPARE(add(qword[rbp + rbp * 4 + 0xabab], rax), 0x48, 0x01, 0x84, 0xad, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(qword[rbp + r12 + 0x1b], rax), 0x4a, 0x01, 0x44, 0x25, 0x1b);
    SINGLE_COMPARE(add(qword[r12], rax), 0x49, 0x01, 0x04, 0x24);
    SINGLE_COMPARE(add(qword[r13 + rbx + 0xabab], rax), 0x49, 0x01, 0x84, 0x1d, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(qword[rax + r13 * 2 + 0x1b], rsi), 0x4a, 0x01, 0x74, 0x68, 0x1b);
    SINGLE_COMPARE(add(qword[rbp + rbx * 2], rsi), 0x48, 0x01, 0x74, 0x5d, 0x00);
    SINGLE_COMPARE(add(qword[rsp + r10 * 2 + 0x1b], r10), 0x4e, 0x01, 0x54, 0x54, 0x1b);

    // [addr], imm
    SINGLE_COMPARE(add(byte[rax], 2), 0x80, 0x00, 0x02);
    SINGLE_COMPARE(add(dword[rax], 2), 0x83, 0x00, 0x02);
    SINGLE_COMPARE(add(dword[rax], 0xabcd), 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(add(qword[rax], 2), 0x48, 0x83, 0x00, 0x02);
    SINGLE_COMPARE(add(qword[rax], 0xabcd), 0x48, 0x81, 0x00, 0xcd, 0xab, 0x00, 0x00);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "BaseUnaryInstructionForms")
{
    SINGLE_COMPARE(div(rcx), 0x48, 0xf7, 0xf1);
    SINGLE_COMPARE(idiv(qword[rax]), 0x48, 0xf7, 0x38);
    SINGLE_COMPARE(mul(qword[rax + rbx]), 0x48, 0xf7, 0x24, 0x18);
    SINGLE_COMPARE(imul(r9), 0x49, 0xf7, 0xe9);
    SINGLE_COMPARE(neg(r9), 0x49, 0xf7, 0xd9);
    SINGLE_COMPARE(not_(r12), 0x49, 0xf7, 0xd4);
    SINGLE_COMPARE(inc(r12), 0x49, 0xff, 0xc4);
    SINGLE_COMPARE(dec(ecx), 0xff, 0xc9);
    SINGLE_COMPARE(dec(byte[rdx]), 0xfe, 0x0a);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMov")
{
    SINGLE_COMPARE(mov(rcx, 1), 0x48, 0xb9, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(mov64(rcx, 0x1234567812345678ll), 0x48, 0xb9, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
    SINGLE_COMPARE(mov(ecx, 2), 0xb9, 0x02, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(mov(cl, 2), 0xb1, 0x02);
    SINGLE_COMPARE(mov(sil, 2), 0x48, 0xb6, 0x02);
    SINGLE_COMPARE(mov(r9b, 2), 0x49, 0xb1, 0x02);
    SINGLE_COMPARE(mov(rcx, qword[rdi]), 0x48, 0x8b, 0x0f);
    SINGLE_COMPARE(mov(dword[rax], 0xabcd), 0xc7, 0x00, 0xcd, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(mov(r13, 1), 0x49, 0xbd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(mov64(r13, 0x1234567812345678ll), 0x49, 0xbd, 0x78, 0x56, 0x34, 0x12, 0x78, 0x56, 0x34, 0x12);
    SINGLE_COMPARE(mov(r13d, 2), 0x41, 0xbd, 0x02, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(mov(r13, qword[r12]), 0x4d, 0x8b, 0x2c, 0x24);
    SINGLE_COMPARE(mov(dword[r13], 0xabcd), 0x41, 0xc7, 0x45, 0x00, 0xcd, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(mov(qword[rdx], r9), 0x4c, 0x89, 0x0a);
    SINGLE_COMPARE(mov(byte[rsi], 0x3), 0xc6, 0x06, 0x03);
    SINGLE_COMPARE(mov(byte[rsi], al), 0x88, 0x06);
    SINGLE_COMPARE(mov(byte[rsi], dil), 0x48, 0x88, 0x3e);
    SINGLE_COMPARE(mov(byte[rsi], r10b), 0x4c, 0x88, 0x16);
    SINGLE_COMPARE(mov(wordReg(ebx), 0x3a3d), 0x66, 0xbb, 0x3d, 0x3a);
    SINGLE_COMPARE(mov(word[rsi], 0x3a3d), 0x66, 0xc7, 0x06, 0x3d, 0x3a);
    SINGLE_COMPARE(mov(word[rsi], wordReg(eax)), 0x66, 0x89, 0x06);
    SINGLE_COMPARE(mov(word[rsi], wordReg(edi)), 0x66, 0x89, 0x3e);
    SINGLE_COMPARE(mov(word[rsi], wordReg(r10)), 0x66, 0x44, 0x89, 0x16);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfMovExtended")
{
    SINGLE_COMPARE(movsx(eax, byte[rcx]), 0x0f, 0xbe, 0x01);
    SINGLE_COMPARE(movsx(r12, byte[r10]), 0x4d, 0x0f, 0xbe, 0x22);
    SINGLE_COMPARE(movsx(ebx, word[r11]), 0x41, 0x0f, 0xbf, 0x1b);
    SINGLE_COMPARE(movsx(rdx, word[rcx]), 0x48, 0x0f, 0xbf, 0x11);
    SINGLE_COMPARE(movzx(eax, byte[rcx]), 0x0f, 0xb6, 0x01);
    SINGLE_COMPARE(movzx(r12, byte[r10]), 0x4d, 0x0f, 0xb6, 0x22);
    SINGLE_COMPARE(movzx(ebx, word[r11]), 0x41, 0x0f, 0xb7, 0x1b);
    SINGLE_COMPARE(movzx(rdx, word[rcx]), 0x48, 0x0f, 0xb7, 0x11);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfTest")
{
    SINGLE_COMPARE(test(al, 8), 0xf6, 0xc0, 0x08);
    SINGLE_COMPARE(test(eax, 8), 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(test(rax, 8), 0x48, 0xf7, 0xc0, 0x08, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(test(rcx, 0xabab), 0x48, 0xf7, 0xc1, 0xab, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(test(rcx, rax), 0x48, 0x85, 0xc8);
    SINGLE_COMPARE(test(rax, qword[rcx]), 0x48, 0x85, 0x01);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfShift")
{
    SINGLE_COMPARE(shl(al, 1), 0xd0, 0xe0);
    SINGLE_COMPARE(shl(al, cl), 0xd2, 0xe0);
    SINGLE_COMPARE(shl(sil, cl), 0x48, 0xd2, 0xe6);
    SINGLE_COMPARE(shl(r10b, cl), 0x49, 0xd2, 0xe2);
    SINGLE_COMPARE(shr(al, 4), 0xc0, 0xe8, 0x04);
    SINGLE_COMPARE(shr(eax, 1), 0xd1, 0xe8);
    SINGLE_COMPARE(sal(eax, cl), 0xd3, 0xe0);
    SINGLE_COMPARE(sal(eax, 4), 0xc1, 0xe0, 0x04);
    SINGLE_COMPARE(sar(rax, 4), 0x48, 0xc1, 0xf8, 0x04);
    SINGLE_COMPARE(sar(r11, 1), 0x49, 0xd1, 0xfb);
    SINGLE_COMPARE(rol(eax, 1), 0xd1, 0xc0);
    SINGLE_COMPARE(rol(eax, cl), 0xd3, 0xc0);
    SINGLE_COMPARE(ror(eax, 1), 0xd1, 0xc8);
    SINGLE_COMPARE(ror(eax, cl), 0xd3, 0xc8);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfLea")
{
    SINGLE_COMPARE(lea(rax, addr[rdx + rcx]), 0x48, 0x8d, 0x04, 0x0a);
    SINGLE_COMPARE(lea(rax, addr[rdx + rax * 4]), 0x48, 0x8d, 0x04, 0x82);
    SINGLE_COMPARE(lea(rax, addr[r13 + r12 * 4 + 4]), 0x4b, 0x8d, 0x44, 0xa5, 0x04);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfSetcc")
{
    SINGLE_COMPARE(setcc(ConditionX64::NotEqual, bl), 0x0f, 0x95, 0xc3);
    SINGLE_COMPARE(setcc(ConditionX64::NotEqual, dil), 0x48, 0x0f, 0x95, 0xc7);
    SINGLE_COMPARE(setcc(ConditionX64::BelowEqual, byte[rcx]), 0x0f, 0x96, 0x01);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfCmov")
{
    SINGLE_COMPARE(cmov(ConditionX64::LessEqual, ebx, eax), 0x0f, 0x4e, 0xd8);
    SINGLE_COMPARE(cmov(ConditionX64::NotZero, rbx, qword[rax]), 0x48, 0x0f, 0x45, 0x18);
    SINGLE_COMPARE(cmov(ConditionX64::Zero, rbx, qword[rax + rcx]), 0x48, 0x0f, 0x44, 0x1c, 0x08);
    SINGLE_COMPARE(cmov(ConditionX64::BelowEqual, r14d, r15d), 0x45, 0x0f, 0x46, 0xf7);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfAbsoluteJumps")
{
    SINGLE_COMPARE(jmp(rax), 0xff, 0xe0);
    SINGLE_COMPARE(jmp(r14), 0x41, 0xff, 0xe6);
    SINGLE_COMPARE(jmp(qword[r14 + rdx * 4]), 0x41, 0xff, 0x24, 0x96);
    SINGLE_COMPARE(call(rax), 0xff, 0xd0);
    SINGLE_COMPARE(call(r14), 0x41, 0xff, 0xd6);
    SINGLE_COMPARE(call(qword[r14 + rdx * 4]), 0x41, 0xff, 0x14, 0x96);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "FormsOfImul")
{
    SINGLE_COMPARE(imul(ecx, esi), 0x0f, 0xaf, 0xce);
    SINGLE_COMPARE(imul(r12, rax), 0x4c, 0x0f, 0xaf, 0xe0);
    SINGLE_COMPARE(imul(r12, qword[rdx + rdi]), 0x4c, 0x0f, 0xaf, 0x24, 0x3a);
    SINGLE_COMPARE(imul(ecx, edx, 8), 0x6b, 0xca, 0x08);
    SINGLE_COMPARE(imul(ecx, r9d, 0xabcd), 0x41, 0x69, 0xc9, 0xcd, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(imul(r8d, eax, -9), 0x44, 0x6b, 0xc0, 0xf7);
    SINGLE_COMPARE(imul(rcx, rdx, 17), 0x48, 0x6b, 0xca, 0x11);
    SINGLE_COMPARE(imul(rcx, r12, 0xabcd), 0x49, 0x69, 0xcc, 0xcd, 0xab, 0x00, 0x00);
    SINGLE_COMPARE(imul(r12, rax, -13), 0x4c, 0x6b, 0xe0, 0xf3);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "NopForms")
{
    SINGLE_COMPARE(nop(), 0x90);
    SINGLE_COMPARE(nop(2), 0x66, 0x90);
    SINGLE_COMPARE(nop(3), 0x0f, 0x1f, 0x00);
    SINGLE_COMPARE(nop(4), 0x0f, 0x1f, 0x40, 0x00);
    SINGLE_COMPARE(nop(5), 0x0f, 0x1f, 0x44, 0x00, 0x00);
    SINGLE_COMPARE(nop(6), 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00);
    SINGLE_COMPARE(nop(7), 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(nop(8), 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(nop(9), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00);
    SINGLE_COMPARE(nop(15), 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00); // 9+6
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentForms")
{
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            build.ret();
            build.align(8, AlignmentDataX64::Nop);
        },
        {0xc3, 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}
    ));

    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            build.ret();
            build.align(32, AlignmentDataX64::Nop);
        },
        {0xc3, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x1f, 0x40, 0x00}
    ));

    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            build.ret();
            build.align(8, AlignmentDataX64::Int3);
        },
        {0xc3, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc}
    ));

    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            build.ret();
            build.align(8, AlignmentDataX64::Ud2);
        },
        {0xc3, 0x0f, 0x0b, 0x0f, 0x0b, 0x0f, 0x0b, 0xcc}
    ));
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AlignmentOverflow")
{
    // Test that alignment correctly resizes the code buffer
    {
        AssemblyBuilderX64 build(/* logText */ false);

        build.ret();
        build.align(8192, AlignmentDataX64::Nop);
        build.finalize();
    }

    {
        AssemblyBuilderX64 build(/* logText */ false);

        build.ret();
        build.align(8192, AlignmentDataX64::Int3);
        build.finalize();
    }

    {
        AssemblyBuilderX64 build(/* logText */ false);

        for (int i = 0; i < 8192; i++)
            build.int3();
        build.finalize();
    }

    {
        AssemblyBuilderX64 build(/* logText */ false);

        build.ret();
        build.align(8192, AlignmentDataX64::Ud2);
        build.finalize();
    }
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "ControlFlow")
{
    // Jump back
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            Label start = build.setLabel();
            build.add(rsi, 1);
            build.cmp(rsi, rdi);
            build.jcc(ConditionX64::Equal, start);
        },
        {0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf3, 0xff, 0xff, 0xff}
    ));

    // Jump back, but the label is set before use
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            Label start;
            build.add(rsi, 1);
            build.setLabel(start);
            build.cmp(rsi, rdi);
            build.jcc(ConditionX64::Equal, start);
        },
        {0x48, 0x83, 0xc6, 0x01, 0x48, 0x3b, 0xf7, 0x0f, 0x84, 0xf7, 0xff, 0xff, 0xff}
    ));

    // Jump forward
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            Label skip;

            build.cmp(rsi, rdi);
            build.jcc(ConditionX64::Greater, skip);
            build.or_(rdi, 0x3e);
            build.setLabel(skip);
        },
        {0x48, 0x3b, 0xf7, 0x0f, 0x8f, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xcf, 0x3e}
    ));

    // Regular jump
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            Label skip;

            build.jmp(skip);
            build.and_(rdi, 0x3e);
            build.setLabel(skip);
        },
        {0xe9, 0x04, 0x00, 0x00, 0x00, 0x48, 0x83, 0xe7, 0x3e}
    ));
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelCall")
{
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            Label fnB;

            build.and_(rcx, 0x3e);
            build.call(fnB);
            build.ret();

            build.setLabel(fnB);
            build.lea(rax, addr[rcx + 0x1f]);
            build.ret();
        },
        {0x48, 0x83, 0xe1, 0x3e, 0xe8, 0x01, 0x00, 0x00, 0x00, 0xc3, 0x48, 0x8d, 0x41, 0x1f, 0xc3}
    ));
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXBinaryInstructionForms")
{
    SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x58, 0xc6);
    SINGLE_COMPARE(vaddpd(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x29, 0x58, 0x01);
    SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymm14), 0xc4, 0x41, 0x2d, 0x58, 0xc6);
    SINGLE_COMPARE(vaddpd(ymm8, ymm10, ymmword[r9]), 0xc4, 0x41, 0x2d, 0x58, 0x01);
    SINGLE_COMPARE(vaddps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x58, 0xc6);
    SINGLE_COMPARE(vaddps(xmm8, xmm10, xmmword[r9]), 0xc4, 0x41, 0x28, 0x58, 0x01);
    SINGLE_COMPARE(vaddsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x58, 0xc6);
    SINGLE_COMPARE(vaddsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x58, 0x01);
    SINGLE_COMPARE(vaddss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x58, 0xc6);
    SINGLE_COMPARE(vaddss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x58, 0x01);

    SINGLE_COMPARE(vaddps(xmm1, xmm2, xmm3), 0xc4, 0xe1, 0x68, 0x58, 0xcb);
    SINGLE_COMPARE(vaddps(xmm9, xmm12, xmmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x18, 0x58, 0x4c, 0x71, 0x1c);
    SINGLE_COMPARE(vaddps(ymm1, ymm2, ymm3), 0xc4, 0xe1, 0x6c, 0x58, 0xcb);
    SINGLE_COMPARE(vaddps(ymm9, ymm12, ymmword[r9 + r14 * 2 + 0x1c]), 0xc4, 0x01, 0x1c, 0x58, 0x4c, 0x71, 0x1c);

    // Coverage for other instructions that follow the same pattern
    SINGLE_COMPARE(vsubsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5c, 0xc6);
    SINGLE_COMPARE(vmulsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x59, 0xc6);
    SINGLE_COMPARE(vdivsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5e, 0xc6);

    SINGLE_COMPARE(vsubps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5c, 0xc6);
    SINGLE_COMPARE(vmulps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x59, 0xc6);
    SINGLE_COMPARE(vdivps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x5e, 0xc6);

    SINGLE_COMPARE(vorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x56, 0xc6);
    SINGLE_COMPARE(vxorpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x57, 0xc6);
    SINGLE_COMPARE(vorps(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x28, 0x56, 0xc6);

    SINGLE_COMPARE(vandpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x54, 0xc6);
    SINGLE_COMPARE(vandnpd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x29, 0x55, 0xc6);

    SINGLE_COMPARE(vmaxsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5f, 0xc6);
    SINGLE_COMPARE(vminsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x5d, 0xc6);

    SINGLE_COMPARE(vcmpeqsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x00);
    SINGLE_COMPARE(vcmpltsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0xc2, 0xc6, 0x01);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXUnaryMergeInstructionForms")
{
    SINGLE_COMPARE(vsqrtpd(xmm8, xmm10), 0xc4, 0x41, 0x79, 0x51, 0xc2);
    SINGLE_COMPARE(vsqrtpd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x51, 0x01);
    SINGLE_COMPARE(vsqrtpd(ymm8, ymm10), 0xc4, 0x41, 0x7d, 0x51, 0xc2);
    SINGLE_COMPARE(vsqrtpd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x51, 0x01);
    SINGLE_COMPARE(vsqrtps(xmm8, xmm10), 0xc4, 0x41, 0x78, 0x51, 0xc2);
    SINGLE_COMPARE(vsqrtps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x51, 0x01);
    SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x51, 0xc6);
    SINGLE_COMPARE(vsqrtsd(xmm8, xmm10, qword[r9]), 0xc4, 0x41, 0x2b, 0x51, 0x01);
    SINGLE_COMPARE(vsqrtss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x51, 0xc6);
    SINGLE_COMPARE(vsqrtss(xmm8, xmm10, dword[r9]), 0xc4, 0x41, 0x2a, 0x51, 0x01);

    // Coverage for other instructions that follow the same pattern
    SINGLE_COMPARE(vucomisd(xmm1, xmm4), 0xc4, 0xe1, 0x79, 0x2e, 0xcc);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXMoveInstructionForms")
{
    SINGLE_COMPARE(vmovsd(qword[r9], xmm10), 0xc4, 0x41, 0x7b, 0x11, 0x11);
    SINGLE_COMPARE(vmovsd(xmm8, qword[r9]), 0xc4, 0x41, 0x7b, 0x10, 0x01);
    SINGLE_COMPARE(vmovsd(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2b, 0x10, 0xc6);
    SINGLE_COMPARE(vmovss(dword[r9], xmm10), 0xc4, 0x41, 0x7a, 0x11, 0x11);
    SINGLE_COMPARE(vmovss(xmm8, dword[r9]), 0xc4, 0x41, 0x7a, 0x10, 0x01);
    SINGLE_COMPARE(vmovss(xmm8, xmm10, xmm14), 0xc4, 0x41, 0x2a, 0x10, 0xc6);
    SINGLE_COMPARE(vmovapd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x28, 0x01);
    SINGLE_COMPARE(vmovapd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x29, 0x11);
    SINGLE_COMPARE(vmovapd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x28, 0x01);
    SINGLE_COMPARE(vmovaps(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x28, 0x01);
    SINGLE_COMPARE(vmovaps(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x29, 0x11);
    SINGLE_COMPARE(vmovaps(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x28, 0x01);
    SINGLE_COMPARE(vmovupd(xmm8, xmmword[r9]), 0xc4, 0x41, 0x79, 0x10, 0x01);
    SINGLE_COMPARE(vmovupd(xmmword[r9], xmm10), 0xc4, 0x41, 0x79, 0x11, 0x11);
    SINGLE_COMPARE(vmovupd(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7d, 0x10, 0x01);
    SINGLE_COMPARE(vmovups(xmm8, xmmword[r9]), 0xc4, 0x41, 0x78, 0x10, 0x01);
    SINGLE_COMPARE(vmovups(xmmword[r9], xmm10), 0xc4, 0x41, 0x78, 0x11, 0x11);
    SINGLE_COMPARE(vmovups(ymm8, ymmword[r9]), 0xc4, 0x41, 0x7c, 0x10, 0x01);
    SINGLE_COMPARE(vmovq(xmm1, rbx), 0xc4, 0xe1, 0xf9, 0x6e, 0xcb);
    SINGLE_COMPARE(vmovq(rbx, xmm1), 0xc4, 0xe1, 0xf9, 0x7e, 0xcb);
    SINGLE_COMPARE(vmovq(xmm1, qword[r9]), 0xc4, 0xc1, 0xf9, 0x6e, 0x09);
    SINGLE_COMPARE(vmovq(qword[r9], xmm1), 0xc4, 0xc1, 0xf9, 0x7e, 0x09);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXConversionInstructionForms")
{
    SINGLE_COMPARE(vcvttsd2si(ecx, xmm0), 0xc4, 0xe1, 0x7b, 0x2c, 0xc8);
    SINGLE_COMPARE(vcvttsd2si(r9d, xmmword[rcx + rdx]), 0xc4, 0x61, 0x7b, 0x2c, 0x0c, 0x11);
    SINGLE_COMPARE(vcvttsd2si(rdx, xmm0), 0xc4, 0xe1, 0xfb, 0x2c, 0xd0);
    SINGLE_COMPARE(vcvttsd2si(r13, xmmword[rcx + rdx]), 0xc4, 0x61, 0xfb, 0x2c, 0x2c, 0x11);
    SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, ecx), 0xc4, 0xe1, 0x2b, 0x2a, 0xe9);
    SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, dword[rcx + rdx]), 0xc4, 0xe1, 0x23, 0x2a, 0x34, 0x11);
    SINGLE_COMPARE(vcvtsi2sd(xmm5, xmm10, r13), 0xc4, 0xc1, 0xab, 0x2a, 0xed);
    SINGLE_COMPARE(vcvtsi2sd(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x2a, 0x34, 0x11);
    SINGLE_COMPARE(vcvtsd2ss(xmm5, xmm10, xmm11), 0xc4, 0xc1, 0x2b, 0x5a, 0xeb);
    SINGLE_COMPARE(vcvtsd2ss(xmm6, xmm11, qword[rcx + rdx]), 0xc4, 0xe1, 0xa3, 0x5a, 0x34, 0x11);
    SINGLE_COMPARE(vcvtss2sd(xmm3, xmm8, xmm12), 0xc4, 0xc1, 0x3a, 0x5a, 0xdc);
    SINGLE_COMPARE(vcvtss2sd(xmm4, xmm9, dword[rcx + rsi]), 0xc4, 0xe1, 0x32, 0x5a, 0x24, 0x31);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "AVXTernaryInstructionForms")
{
    SINGLE_COMPARE(vroundsd(xmm7, xmm12, xmm3, RoundingModeX64::RoundToNegativeInfinity), 0xc4, 0xe3, 0x19, 0x0b, 0xfb, 0x09);
    SINGLE_COMPARE(
        vroundsd(xmm8, xmm13, xmmword[r13 + rdx], RoundingModeX64::RoundToPositiveInfinity), 0xc4, 0x43, 0x11, 0x0b, 0x44, 0x15, 0x00, 0x0a
    );
    SINGLE_COMPARE(vroundsd(xmm9, xmm14, xmmword[rcx + r10], RoundingModeX64::RoundToZero), 0xc4, 0x23, 0x09, 0x0b, 0x0c, 0x11, 0x0b);
    SINGLE_COMPARE(vblendvpd(xmm7, xmm12, xmmword[rcx + r10], xmm5), 0xc4, 0xa3, 0x19, 0x4b, 0x3c, 0x11, 0x50);

    SINGLE_COMPARE(vpshufps(xmm7, xmm12, xmmword[rcx + r10], 0b11010100), 0xc4, 0xa1, 0x18, 0xc6, 0x3c, 0x11, 0xd4);
    SINGLE_COMPARE(vpinsrd(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x22, 0x3c, 0x11, 0x02);

    SINGLE_COMPARE(vdpps(xmm7, xmm12, xmmword[rcx + r10], 2), 0xc4, 0xa3, 0x19, 0x40, 0x3c, 0x11, 0x02);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "MiscInstructions")
{
    SINGLE_COMPARE(int3(), 0xcc);
    SINGLE_COMPARE(ud2(), 0x0f, 0x0b);
    SINGLE_COMPARE(bsr(eax, edx), 0x0f, 0xbd, 0xc2);
    SINGLE_COMPARE(bsf(eax, edx), 0x0f, 0xbc, 0xc2);
    SINGLE_COMPARE(bswap(eax), 0x0f, 0xc8);
    SINGLE_COMPARE(bswap(r12d), 0x41, 0x0f, 0xcc);
    SINGLE_COMPARE(bswap(rax), 0x48, 0x0f, 0xc8);
    SINGLE_COMPARE(bswap(r12), 0x49, 0x0f, 0xcc);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "LabelLea")
{
    CHECK(check(
        [](AssemblyBuilderX64& build)
        {
            Label fn;
            build.lea(rax, fn);
            build.ret();

            build.setLabel(fn);
            build.ret();
        },
        {0x48, 0x8d, 0x05, 0x01, 0x00, 0x00, 0x00, 0xc3, 0xc3}
    ));
}

TEST_CASE("LogTest")
{
    AssemblyBuilderX64 build(/* logText= */ true);

    build.push(r12);
    build.align(8);
    build.align(8, AlignmentDataX64::Int3);
    build.align(8, AlignmentDataX64::Ud2);

    build.add(rax, rdi);
    build.add(rcx, 8);
    build.sub(dword[rax], 0x1fdc);
    build.and_(dword[rcx], 0x37);
    build.mov(rdi, qword[rax + rsi * 2]);
    build.vaddss(xmm0, xmm0, dword[rax + r14 * 2 + 0x1c]);

    Label start = build.setLabel();
    build.cmp(rsi, rdi);
    build.jcc(ConditionX64::Equal, start);
    build.lea(rcx, start);
    build.lea(rcx, addr[rdx]);

    build.jmp(qword[rdx]);
    build.vaddps(ymm9, ymm12, ymmword[rbp + 0xc]);
    build.vaddpd(ymm2, ymm7, build.f64(2.5));
    build.neg(qword[rbp + r12 * 2]);
    build.mov64(r10, 0x1234567812345678ll);
    build.vmovapd(xmmword[rax], xmm11);
    build.movzx(eax, byte[rcx]);
    build.movsx(rsi, word[r12]);
    build.imul(rcx, rdx);
    build.imul(rcx, rdx, 8);
    build.vroundsd(xmm1, xmm2, xmm3, RoundingModeX64::RoundToNearestEven);
    build.add(rdx, qword[rcx - 12]);
    build.pop(r12);
    build.cmov(ConditionX64::AboveEqual, rax, rbx);
    build.ret();
    build.int3();

    build.nop();
    build.nop(2);
    build.nop(3);
    build.nop(4);
    build.nop(5);
    build.nop(6);
    build.nop(7);
    build.nop(8);
    build.nop(9);

    build.finalize();

    std::string expected = R"(
 push        r12
; align 8
 nop         word ptr[rax+rax] ; 6-byte nop
; align 8 using int3
; align 8 using ud2
 add         rax,rdi
 add         rcx,8
 sub         dword ptr [rax],1FDCh
 and         dword ptr [rcx],37h
 mov         rdi,qword ptr [rax+rsi*2]
 vaddss      xmm0,xmm0,dword ptr [rax+r14*2+01Ch]
.L1:
 cmp         rsi,rdi
 je          .L1
 lea         rcx,.L1
 lea         rcx,[rdx]
 jmp         qword ptr [rdx]
 vaddps      ymm9,ymm12,ymmword ptr [rbp+0Ch]
 vaddpd      ymm2,ymm7,qword ptr [.start-8]
 neg         qword ptr [rbp+r12*2]
 mov         r10,1234567812345678h
 vmovapd     xmmword ptr [rax],xmm11
 movzx       eax,byte ptr [rcx]
 movsx       rsi,word ptr [r12]
 imul        rcx,rdx
 imul        rcx,rdx,8
 vroundsd    xmm1,xmm2,xmm3,8
 add         rdx,qword ptr [rcx-0Ch]
 pop         r12
 cmovae      rax,rbx
 ret
 int3
 nop
 xchg        ax, ax ; 2-byte nop
 nop         dword ptr[rax] ; 3-byte nop
 nop         dword ptr[rax] ; 4-byte nop
 nop         dword ptr[rax+rax] ; 5-byte nop
 nop         word ptr[rax+rax] ; 6-byte nop
 nop         dword ptr[rax] ; 7-byte nop
 nop         dword ptr[rax+rax] ; 8-byte nop
 nop         word ptr[rax+rax] ; 9-byte nop
)";

    CHECK("\n" + build.text == expected);
}

TEST_CASE_FIXTURE(AssemblyBuilderX64Fixture, "Constants")
{
    // clang-format off
    CHECK(check(
        [](AssemblyBuilderX64& build) {
            build.xor_(rax, rax);
            build.add(rax, build.i64(0x1234567887654321));
            build.vmovss(xmm2, build.f32(1.0f));
            build.vmovsd(xmm3, build.f64(1.0));
            build.vmovaps(xmm4, build.f32x4(1.0f, 2.0f, 4.0f, 8.0f));
            char arr[16] = "hello world!123";
            build.vmovupd(xmm5, build.bytes(arr, 16, 8));
            build.vmovapd(xmm5, build.f64x2(5.0, 6.0));
            build.ret();
        },
        {
            0x48, 0x33, 0xc0,
            0x48, 0x03, 0x05, 0xee, 0xff, 0xff, 0xff,
            0xc4, 0xe1, 0x7a, 0x10, 0x15, 0xe1, 0xff, 0xff, 0xff,
            0xc4, 0xe1, 0x7b, 0x10, 0x1d, 0xcc, 0xff, 0xff, 0xff,
            0xc4, 0xe1, 0x78, 0x28, 0x25, 0xab, 0xff, 0xff, 0xff,
            0xc4, 0xe1, 0x79, 0x10, 0x2d, 0x92, 0xff, 0xff, 0xff,
            0xc4, 0xe1, 0x79, 0x28, 0x2d, 0x79, 0xff, 0xff, 0xff,
            0xc3
        },
        {
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x40,
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x40,
            'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', '1', '2', '3', 0x0,
            0x00, 0x00, 0x80, 0x3f,
            0x00, 0x00, 0x00, 0x40,
            0x00, 0x00, 0x80, 0x40,
            0x00, 0x00, 0x00, 0x41,
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // padding to align f32x4
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f,
            0x00, 0x00, 0x00, 0x00, // padding to align f64
            0x00, 0x00, 0x80, 0x3f,
            0x21, 0x43, 0x65, 0x87, 0x78, 0x56, 0x34, 0x12,
        }));
    // clang-format on
}

TEST_CASE("ConstantStorage")
{
    AssemblyBuilderX64 build(/* logText= */ false);

    for (int i = 0; i <= 3000; i++)
        build.vaddss(xmm0, xmm0, build.i32(i));

    build.finalize();

    CHECK(build.data.size() == 12004);

    for (int i = 0; i <= 3000; i++)
    {
        CHECK(build.data[i * 4 + 0] == ((3000 - i) & 0xff));
        CHECK(build.data[i * 4 + 1] == ((3000 - i) >> 8));
        CHECK(build.data[i * 4 + 2] == 0x00);
        CHECK(build.data[i * 4 + 3] == 0x00);
    }
}

TEST_CASE("ConstantStorageDedup")
{
    AssemblyBuilderX64 build(/* logText= */ false);

    for (int i = 0; i <= 3000; i++)
        build.vaddss(xmm0, xmm0, build.f32(1.0f));

    build.finalize();

    CHECK(build.data.size() == 4);

    CHECK(build.data[0] == 0x00);
    CHECK(build.data[1] == 0x00);
    CHECK(build.data[2] == 0x80);
    CHECK(build.data[3] == 0x3f);
}

TEST_CASE("ConstantCaching")
{
    AssemblyBuilderX64 build(/* logText= */ false);

    OperandX64 two = build.f64(2);

    // Force data relocation
    for (int i = 0; i < 4096; i++)
        build.f64(i);

    CHECK(build.f64(2).imm == two.imm);

    build.finalize();
}

TEST_SUITE_END();
